

*For Margaret dropbox folders 
global NCREIF "/Users/BeckaBrolinson/Dropbox/NCREIF/data" 
global build 	"$NCREIF/build" 
global analysis "$NCREIF/analysis"
global results 	"$analysis/results"
global figures 	"$analysis/figures" 


*------------------------------------------------------------------------------*
*	Step 14- EPA Energy Star Data	   *
*------------------------------------------------------------------------------*	

*Import data 
import delimited "$build/labelbuildingregistry.csv", clear 


*Split certification years and scores 
	split certificationyears, p(,)
	split scores, p(,) 
	
	drop certificationyears scores 
	
	*Generate unique observations 
	bysort propertyplantid: gen id_unique =_n==1 
	drop if id_unique == 0 
	drop id_unique 
	
	
*Reshape from wide to long 
	reshape long certificationyears scores, i(propertyplantid) j(count)
	destring certificationyears, replace force 
	destring scores, replace force 
	drop if missing(certificationyears) 
	
*Generate statistics that I want to plot 
	gen one = 1 
	bysort certificationyears: egen cert_count  = sum(one) 
	bysort certificationyears: egen  score_mean = mean(scores) 
	bysort certificationyears: egen  score_median = median(scores) 
	
	*Then by property types 
	bysort certificationyears propertytype: egen cert_count_type  = sum(one) 
	by certificationyears propertytype: egen score_mean_type  = mean(scores)
	by certificationyears propertytype: egen score_median_type  = median(scores)


*First make plots for all data 
	preserve
	bysort certificationyears: gen id_unique = _n ==1 
	drop if id_unique ==0
	drop if certificationyears==2019
	twoway 	(line cert_count certificationyears, yaxis(1) ytitle("Building Count", axis(1))) ///
			(line score_mean certificationyears, yaxis(2) ytitle("Energy Star Score", axis(2)) /// 
			ylab(,nogrid) bgcolor(white) graphregion(color(white)) ///
			lcolor(cranberry) lpattern(dash) ///
			xtitle("Year") xlabel(2000[2]2018) ///
			legend(lab(1 "Building Count") lab(2 "Mean Energy Star Score")))
			gr export "$figures/17_estartrend_year.png", replace		
	restore 
			
*Plots for offices 
	
	keep if propertytype == "Office"
	
	
	preserve
	bysort certificationyears: gen id_unique = _n ==1 
	drop if id_unique ==0
	drop if certificationyears==2019
	twoway 	(line cert_count_type certificationyears, yaxis(1) ytitle("Office Count", axis(1))) ///
			(line score_mean_type  certificationyears, yaxis(2) ytitle("Energy Star Score", axis(2)) /// 
			ylab(,nogrid) bgcolor(white) graphregion(color(white)) ///
			lcolor(cranberry) lpattern(dash) ///
			xtitle("Year") xlabel(2000[2]2018) ///
			legend(lab(1 "Office Count") lab(2 "Mean Energy Star Score")))
			gr export "$figures/18_estartrend_office.png", replace			
	restore 

*Count the number of office buildings that ever get certified 
	keep if year>=2000 & year <=2015
	
	*Between 2000 and 2015 2,555 unique office buildings get certified 
	

	
